msg_tool\scripts\bgi\archive/
dsc.rs

1//! Buriko General Interpreter/Ethornell compressed file in archive
2use crate::ext::io::*;
3use crate::ext::vec::*;
4use crate::scripts::base::*;
5use crate::types::*;
6use crate::utils::bit_stream::*;
7use crate::utils::num_range::*;
8use anyhow::Result;
9use rand::RngExt;
10use std::io::{BufWriter, Seek, Write};
11
12#[derive(Debug)]
13struct HuffmanCode {
14    code: u16,
15    depth: u8,
16}
17
18impl std::cmp::PartialEq for HuffmanCode {
19    fn eq(&self, other: &Self) -> bool {
20        self.code == other.code && self.depth == other.depth
21    }
22}
23
24impl std::cmp::Eq for HuffmanCode {}
25
26impl std::cmp::PartialOrd for HuffmanCode {
27    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
28        let cmp = self.depth.cmp(&other.depth);
29        if cmp == std::cmp::Ordering::Equal {
30            Some(self.code.cmp(&other.code))
31        } else {
32            Some(cmp)
33        }
34    }
35}
36
37impl std::cmp::Ord for HuffmanCode {
38    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
39        let cmp = self.depth.cmp(&other.depth);
40        if cmp == std::cmp::Ordering::Equal {
41            self.code.cmp(&other.code)
42        } else {
43            cmp
44        }
45    }
46}
47
48#[derive(Clone, Debug)]
49struct HuffmanNode {
50    is_parent: bool,
51    code: Option<u16>,
52    left_index: usize,
53    right_index: usize,
54}
55
56/// Decoder for Buriko General Interpreter/Ethornell compressed files (DSC format).
57pub struct DscDecoder<'a> {
58    stream: MsbBitStream<MemReaderRef<'a>>,
59    key: u32,
60    magic: u32,
61    output_size: u32,
62    dec_count: u32,
63}
64
65impl<'a> DscDecoder<'a> {
66    /// Creates a new DscDecoder from the given data slice.
67    pub fn new(data: &'a [u8]) -> Result<Self> {
68        let mut reader = MemReaderRef::new(data);
69        let magic = (reader.read_u16()? as u32) << 16;
70        reader.pos = 0x10;
71        let key = reader.read_u32()?;
72        let output_size = reader.read_u32()?;
73        let dec_count = reader.read_u32()?;
74        let stream = MsbBitStream::new(reader);
75        Ok(DscDecoder {
76            stream,
77            key,
78            magic,
79            output_size,
80            dec_count,
81        })
82    }
83
84    /// Unpacks the DSC file and returns the decompressed data.
85    pub fn unpack(mut self) -> Result<Vec<u8>> {
86        self.stream.m_input.pos = 0x20;
87        let mut codes = Vec::new();
88        for i in 0..512 {
89            let src = self.stream.m_input.read_u8()?;
90            let depth = src.overflowing_sub(self.update_key()).0;
91            if depth > 0 {
92                codes.push(HuffmanCode { code: i, depth })
93            }
94        }
95        codes.sort();
96        let root = Self::create_huffman_tree(codes);
97        self.huffman_decompress(root)
98    }
99
100    fn create_huffman_tree(codes: Vec<HuffmanCode>) -> Vec<HuffmanNode> {
101        let mut trees = Vec::with_capacity(1024);
102        trees.resize(
103            1024,
104            HuffmanNode {
105                is_parent: false,
106                code: None,
107                left_index: 0,
108                right_index: 0,
109            },
110        );
111        let mut left_index = vec![0usize; 512];
112        let mut right_index = vec![0usize; 512];
113        let mut next_node_index = 1usize;
114        let mut depth_nodes = 1usize;
115        let mut depth = 0u8;
116        let mut left_child = true;
117        let mut n = 0;
118        while n < codes.len() {
119            let huffman_node_index = left_child;
120            left_child = !left_child;
121            let mut depth_existed_nodes = 0;
122            while n < codes.len() && codes[n].depth == depth {
123                let index = if huffman_node_index {
124                    left_index[depth_existed_nodes]
125                } else {
126                    right_index[depth_existed_nodes]
127                };
128                trees[index].code = Some(codes[n].code);
129                n += 1;
130                depth_existed_nodes += 1;
131            }
132            let depth_nodes_to_create = depth_nodes - depth_existed_nodes;
133            for i in 0..depth_nodes_to_create {
134                let index = if huffman_node_index {
135                    left_index[depth_existed_nodes + i]
136                } else {
137                    right_index[depth_existed_nodes + i]
138                };
139                let node = &mut trees[index];
140                node.is_parent = true;
141                if left_child {
142                    left_index[i * 2] = next_node_index;
143                    node.left_index = next_node_index;
144                    next_node_index += 1;
145                    left_index[i * 2 + 1] = next_node_index;
146                    node.right_index = next_node_index;
147                    next_node_index += 1;
148                } else {
149                    right_index[i * 2] = next_node_index;
150                    node.left_index = next_node_index;
151                    next_node_index += 1;
152                    right_index[i * 2 + 1] = next_node_index;
153                    node.right_index = next_node_index;
154                    next_node_index += 1;
155                }
156            }
157            depth += 1;
158            depth_nodes = depth_nodes_to_create * 2;
159        }
160        trees
161    }
162
163    fn huffman_decompress(&mut self, nodes: Vec<HuffmanNode>) -> Result<Vec<u8>> {
164        let output_size = self.output_size as usize;
165        let mut output = Vec::with_capacity(output_size);
166        let mut dst = 0;
167        output.resize(output_size, 0);
168        for _ in 0..self.dec_count {
169            let mut current_node = &nodes[0];
170            loop {
171                let bit = self.stream.get_next_bit()?;
172                if !bit {
173                    current_node = &nodes[current_node.left_index]
174                } else {
175                    current_node = &nodes[current_node.right_index]
176                }
177                if !current_node.is_parent {
178                    break;
179                }
180            }
181            let code = *current_node.code.as_ref().unwrap();
182            if code >= 256 {
183                let mut offset = self.stream.get_bits(12)?;
184                let count = ((code & 0xFF) + 2) as usize;
185                offset += 2;
186                output.copy_overlapped(dst - offset as usize, dst, count);
187                dst += count;
188            } else {
189                output[dst] = code as u8;
190                dst += 1;
191            }
192        }
193        if dst != output_size {
194            eprintln!(
195                "Warning: Output size mismatch, expected {}, got {}",
196                self.output_size, dst
197            );
198            crate::COUNTER.inc_warning();
199        }
200        Ok(output)
201    }
202
203    fn update_key(&mut self) -> u8 {
204        let v0 = 20021 * (self.key & 0xffff);
205        let mut v1 = self.magic | (self.key >> 16);
206        v1 = v1
207            .overflowing_mul(20021)
208            .0
209            .overflowing_add(self.key.overflowing_mul(346).0)
210            .0;
211        v1 = overf::wrapping!(v1 + (v0 >> 16)) & 0xffff;
212        self.key = (v1 << 16) + (v0 & 0xffff) + 1;
213        v1 as u8
214    }
215}
216
217#[derive(Debug, Clone, Copy)]
218enum LzssOp {
219    Literal(u8),
220    Match { len: u16, offset: u16 },
221}
222
223#[derive(Debug, Clone, Copy, PartialEq, Eq)]
224pub enum MatchMode {
225    Store,
226    Rle,
227    NonLazy,
228    Lazy,
229    Optimal, // 新增：最优解析模式
230}
231
232#[derive(Debug, Clone, Copy)]
233pub struct CompressConfig {
234    pub good_length: usize,
235    pub max_lazy: usize,
236    pub nice_length: usize,
237    pub max_chain: usize,
238    pub mode: MatchMode,
239}
240
241pub const COMPRESS_CONFIGS: [CompressConfig; 11] = [
242    // 0: Store (No compression)
243    CompressConfig {
244        good_length: 0,
245        max_lazy: 0,
246        nice_length: 0,
247        max_chain: 0,
248        mode: MatchMode::Store,
249    },
250    // 1: RLE (Fastest) - Matches repeated patterns directly
251    CompressConfig {
252        good_length: 4,
253        max_lazy: 0,
254        nice_length: 8,
255        max_chain: 0,
256        mode: MatchMode::Rle,
257    },
258    // 2: RLE
259    CompressConfig {
260        good_length: 4,
261        max_lazy: 0,
262        nice_length: 16,
263        max_chain: 0,
264        mode: MatchMode::Rle,
265    },
266    // 3: Non-lazy match
267    CompressConfig {
268        good_length: 4,
269        max_lazy: 0,
270        nice_length: 32,
271        max_chain: 8,
272        mode: MatchMode::NonLazy,
273    },
274    // 4: Non-lazy match
275    CompressConfig {
276        good_length: 4,
277        max_lazy: 0,
278        nice_length: 64,
279        max_chain: 16,
280        mode: MatchMode::NonLazy,
281    },
282    // 5: Lazy match
283    CompressConfig {
284        good_length: 8,
285        max_lazy: 16,
286        nice_length: 32,
287        max_chain: 32,
288        mode: MatchMode::Lazy,
289    },
290    // 6: Lazy match
291    CompressConfig {
292        good_length: 8,
293        max_lazy: 16,
294        nice_length: 128,
295        max_chain: 128,
296        mode: MatchMode::Lazy,
297    },
298    // 7: Lazy match
299    CompressConfig {
300        good_length: 8,
301        max_lazy: 32,
302        nice_length: 128,
303        max_chain: 256,
304        mode: MatchMode::Lazy,
305    },
306    // 8: Lazy match
307    CompressConfig {
308        good_length: 32,
309        max_lazy: 128,
310        nice_length: 258,
311        max_chain: 1024,
312        mode: MatchMode::Lazy,
313    },
314    // 9: Lazy match (Best)
315    CompressConfig {
316        good_length: 32,
317        max_lazy: 258,
318        nice_length: 258,
319        max_chain: 4096,
320        mode: MatchMode::Lazy,
321    },
322    // 10: Optimal (Zopfli-like) - 穷举所有可能以找到最优解
323    CompressConfig {
324        good_length: 258,
325        max_lazy: 258,
326        nice_length: 258,
327        max_chain: 4096,
328        mode: MatchMode::Optimal,
329    },
330];
331
332/// Computes optimal length-limited Huffman code depths using the Package-Merge algorithm.
333fn package_merge(freqs: &[u32], max_len: u8) -> Vec<u8> {
334    let max_len = max_len as usize;
335    let mut depths = vec![0u8; freqs.len()];
336
337    let mut symbols: Vec<(u64, Vec<usize>)> = freqs
338        .iter()
339        .enumerate()
340        .filter(|&(_, &f)| f > 0)
341        .map(|(i, &f)| (f as u64, vec![i]))
342        .collect();
343
344    let n = symbols.len();
345    if n == 0 {
346        return depths;
347    }
348    if n == 1 {
349        depths[symbols[0].1[0]] = 1;
350        return depths;
351    }
352
353    symbols.sort_by_key(|x| x.0);
354    let mut prev_list = symbols.clone();
355
356    for _ in 1..max_len {
357        let mut current_list = symbols.clone();
358        for p in 0..(prev_list.len() / 2) {
359            let left = &prev_list[p * 2];
360            let right = &prev_list[p * 2 + 1];
361
362            let combined_weight = left.0 + right.0;
363            let mut combined_indices = Vec::with_capacity(left.1.len() + right.1.len());
364            combined_indices.extend_from_slice(&left.1);
365            combined_indices.extend_from_slice(&right.1);
366
367            current_list.push((combined_weight, combined_indices));
368        }
369        current_list.sort_by_key(|x| x.0);
370        prev_list = current_list;
371    }
372
373    let items_to_select = (2 * n).saturating_sub(2);
374    let take_count = std::cmp::min(items_to_select, prev_list.len());
375
376    for i in 0..take_count {
377        for &sym in &prev_list[i].1 {
378            depths[sym] += 1;
379        }
380    }
381    depths
382}
383
384fn calculate_huffman_depths(freqs: &[u32]) -> Vec<u8> {
385    const MAX_DEPTH: u8 = 9;
386    package_merge(freqs, MAX_DEPTH)
387}
388
389fn generate_canonical_codes(depths: &[u8]) -> Vec<Option<(u16, u8)>> {
390    let mut codes_with_depths = vec![];
391    for (symbol, &depth) in depths.iter().enumerate() {
392        if depth > 0 {
393            codes_with_depths.push((symbol as u16, depth));
394        }
395    }
396    codes_with_depths.sort_by(|a, b| {
397        let depth_cmp = a.1.cmp(&b.1);
398        if depth_cmp == std::cmp::Ordering::Equal {
399            a.0.cmp(&b.0)
400        } else {
401            depth_cmp
402        }
403    });
404
405    let mut huffman_codes = vec![None; 512];
406    let mut current_code = 0u16;
407    let mut last_depth = 0u8;
408
409    for &(symbol, depth) in &codes_with_depths {
410        if last_depth != 0 {
411            current_code <<= depth - last_depth;
412        }
413        huffman_codes[symbol as usize] = Some((current_code, depth));
414        current_code += 1;
415        last_depth = depth;
416    }
417    huffman_codes
418}
419
420#[inline(always)]
421fn get_match_length(a: &[u8], b: &[u8]) -> usize {
422    let max = a.len().min(b.len());
423    let mut len = 0;
424
425    // 每次比较 8 个字节，极大提升长文本或连续 0 的匹配效率
426    while len + 8 <= max {
427        let a_chunk = u64::from_le_bytes(a[len..len + 8].try_into().unwrap());
428        let b_chunk = u64::from_le_bytes(b[len..len + 8].try_into().unwrap());
429        if a_chunk != b_chunk {
430            let diff = a_chunk ^ b_chunk;
431            // 找出第一个不同的字节位置
432            return len + (diff.trailing_zeros() / 8) as usize;
433        }
434        len += 8;
435    }
436
437    // 比较剩余的尾部字节
438    while len < max && a[len] == b[len] {
439        len += 1;
440    }
441    len
442}
443
444#[inline(always)]
445fn find_match(
446    data: &[u8],
447    pos: usize,
448    head: &[i32],
449    prev: &[i32],
450    config: &CompressConfig,
451) -> (usize, usize) {
452    // 如果是 Store 或者连 2 个字节的匹配空间都不够了，直接返回
453    if config.mode == MatchMode::Store || pos + 2 > data.len() {
454        return (0, 0);
455    }
456
457    let max_len = (data.len() - pos).min(257);
458
459    // 针对 RLE 路径的极速优化
460    if config.mode == MatchMode::Rle {
461        if pos >= 2 {
462            // 极速前置检查：由于格式要求匹配至少2个字节，
463            // 只有当最前面的 2 个字节完全相同时，我们才进行切片和后续耗时的 SWAR 比对。
464            // 这排除了 99% 的无效调用。
465            if data[pos] == data[pos - 2] && data[pos + 1] == data[pos - 1] {
466                let src_slice = &data[pos..pos + max_len];
467                let match_slice = &data[pos - 2..pos - 2 + max_len];
468                let best_len = get_match_length(src_slice, match_slice);
469                return (best_len, 2);
470            }
471        }
472        return (0, 0);
473    }
474
475    let src_slice = &data[pos..pos + max_len];
476    let limit = pos.saturating_sub(4097);
477
478    // Level 3~10: 基于哈希字典进行跳跃搜索
479    let key = ((data[pos] as usize) << 8) | (data[pos + 1] as usize);
480    let mut match_pos_i32 = head[key];
481    let mut chain_length = config.max_chain;
482
483    let mut best_len = 0;
484    let mut best_offset = 0;
485
486    while match_pos_i32 != -1 && chain_length > 0 {
487        let match_pos = match_pos_i32 as usize;
488        if match_pos < limit {
489            break;
490        }
491
492        // 格式强制限制最小的 offset >= 2
493        if pos - match_pos < 2 {
494            match_pos_i32 = prev[match_pos];
495            chain_length -= 1;
496            continue;
497        }
498
499        let match_slice = &data[match_pos..match_pos + max_len];
500
501        // 高速剪枝：如果已知最长长度 best_len 处的那一个字节对不上，
502        // 说明这条链条再怎么强也不可能突破当前的 best_len，直接跳过整个字符串比对
503        if best_len > 0 && best_len < max_len {
504            if match_slice[best_len] != src_slice[best_len] {
505                match_pos_i32 = prev[match_pos];
506                chain_length -= 1;
507                continue;
508            }
509        }
510
511        let current_len = get_match_length(src_slice, match_slice);
512
513        if current_len > best_len {
514            best_len = current_len;
515            best_offset = pos - match_pos;
516            if current_len >= config.nice_length {
517                break;
518            }
519            if current_len >= config.good_length {
520                chain_length >>= 2;
521            }
522        }
523
524        match_pos_i32 = prev[match_pos];
525        chain_length -= 1;
526    }
527
528    if best_len >= 2 {
529        (best_len, best_offset)
530    } else {
531        (0, 0)
532    }
533}
534
535/// Zopfli-like Optimal Parsing
536/// 通过多次迭代动态规划，寻找全局最优的 LZSS 匹配路径
537fn optimal_parse(data: &[u8], config: &CompressConfig) -> Vec<LzssOp> {
538    let n = data.len();
539    if n == 0 {
540        return vec![];
541    }
542
543    // 预先计算每个位置的最长匹配，避免在 DP 迭代中重复搜索
544    let mut longest_matches = vec![(0usize, 0usize); n];
545    let mut head = vec![-1i32; 1 << 16];
546    let mut prev = vec![-1i32; n];
547    let insert_limit = n.saturating_sub(1);
548
549    for pos in 0..n {
550        let (best_len, best_offset) = find_match(data, pos, &head, &prev, config);
551        longest_matches[pos] = (best_len, best_offset);
552
553        if pos < insert_limit {
554            let key = ((data[pos] as usize) << 8) | (data[pos + 1] as usize);
555            prev[pos] = head[key];
556            head[key] = pos as i32;
557        }
558    }
559
560    // 初始代价：假设所有符号的 Huffman 编码长度均为 9 bits
561    let mut sym_costs = vec![9u32; 512];
562    let mut best_ops = vec![];
563
564    const NUM_ITERATIONS: usize = 4;
565    for iter in 0..NUM_ITERATIONS {
566        let mut costs = vec![u32::MAX; n + 1];
567        let mut links = vec![None; n + 1];
568        costs[0] = 0;
569
570        // 动态规划寻找最短路径
571        for i in 0..n {
572            let current_cost = costs[i];
573            if current_cost == u32::MAX {
574                continue;
575            }
576
577            // 1. 尝试字面量 (Literal)
578            let lit_sym = data[i] as usize;
579            let lit_cost = current_cost + sym_costs[lit_sym];
580            if lit_cost < costs[i + 1] {
581                costs[i + 1] = lit_cost;
582                links[i + 1] = Some(LzssOp::Literal(data[i]));
583            }
584
585            // 2. 尝试匹配 (Match)
586            let (max_len, offset) = longest_matches[i];
587            for len in 2..=max_len {
588                let match_sym = 256 + (len - 2);
589                // 匹配的代价 = 当前代价 + 长度符号的 Huffman 代价 + 固定的 12 bits 偏移量代价
590                let match_cost = current_cost + sym_costs[match_sym] + 12;
591                if match_cost < costs[i + len] {
592                    costs[i + len] = match_cost;
593                    links[i + len] = Some(LzssOp::Match {
594                        len: len as u16,
595                        offset: offset as u16,
596                    });
597                }
598            }
599        }
600
601        // 回溯构建操作序列
602        let mut ops = vec![];
603        let mut curr = n;
604        while curr > 0 {
605            let op = links[curr].unwrap();
606            ops.push(op);
607            curr -= match op {
608                LzssOp::Literal(_) => 1,
609                LzssOp::Match { len, .. } => len as usize,
610            };
611        }
612        ops.reverse();
613
614        if iter == NUM_ITERATIONS - 1 {
615            best_ops = ops;
616            break;
617        }
618
619        // 统计频率并更新 Huffman 树代价
620        let mut freqs = vec![0u32; 512];
621        for op in &ops {
622            match op {
623                LzssOp::Literal(b) => freqs[*b as usize] += 1,
624                LzssOp::Match { len, .. } => freqs[256 + (*len - 2) as usize] += 1,
625            }
626        }
627
628        let depths = calculate_huffman_depths(&freqs);
629        for i in 0..512 {
630            sym_costs[i] = if depths[i] > 0 {
631                depths[i] as u32
632            } else {
633                9 // 对于未使用的符号，赋予一个平均惩罚代价
634            };
635        }
636    }
637
638    best_ops
639}
640
641/// Encoder for Buriko General Interpreter/Ethornell compressed files (DSC format).
642pub struct DscEncoder<'a, T: Write + Seek> {
643    stream: MsbBitWriter<'a, BufWriter<T>>,
644    magic: u32,
645    key: u32,
646    dec_count: u32,
647    level: u8,
648}
649
650impl<'a, T: Write + Seek> DscEncoder<'a, T> {
651    /// Creates a new DscEncoder with the given writer and compression level (0-10).
652    pub fn new(writer: &'a mut BufWriter<T>, level: u8) -> Self {
653        let stream = MsbBitWriter::new(writer);
654        DscEncoder {
655            stream,
656            magic: 0x5344 << 16, // "DS"
657            key: rand::rng().random(),
658            dec_count: 0,
659            level: level.min(10),
660        }
661    }
662
663    /// Packs the given data into the DSC format using configured LZSS compression.
664    pub fn pack(mut self, data: &[u8]) -> Result<()> {
665        let config = &COMPRESS_CONFIGS[self.level as usize];
666
667        let ops = if config.mode == MatchMode::Optimal {
668            optimal_parse(data, config)
669        } else {
670            let mut ops = vec![];
671            let mut pos = 0;
672            // 预分配哈希表，65536 对应 2 bytes 的所有可能
673            let mut head = vec![-1i32; 1 << 16];
674            let mut prev = vec![-1i32; data.len()];
675            let insert_limit = data.len().saturating_sub(1); // 防止 data[p + 1] 越界
676
677            while pos < data.len() {
678                if config.mode == MatchMode::Store {
679                    ops.push(LzssOp::Literal(data[pos]));
680                    pos += 1;
681                    continue;
682                }
683
684                let (match_len, match_offset) = find_match(data, pos, &head, &prev, config);
685
686                if match_len >= 2 {
687                    let mut lazy_match = false;
688
689                    // 延迟匹配逻辑 (Lazy Evaluation)
690                    if config.mode == MatchMode::Lazy
691                        && match_len <= config.max_lazy
692                        && pos + 1 < data.len()
693                    {
694                        // 为下一次尝试预先将当前 pos 插入字典
695                        if pos < insert_limit {
696                            let key = ((data[pos] as usize) << 8) | (data[pos + 1] as usize);
697                            prev[pos] = head[key];
698                            head[key] = pos as i32;
699                        }
700
701                        let (next_len, _) = find_match(data, pos + 1, &head, &prev, config);
702
703                        if next_len > match_len {
704                            lazy_match = true;
705                        }
706                    }
707
708                    if lazy_match {
709                        ops.push(LzssOp::Literal(data[pos]));
710                        pos += 1;
711                        continue;
712                    }
713
714                    ops.push(LzssOp::Match {
715                        len: match_len as u16,
716                        offset: match_offset as u16,
717                    });
718
719                    let start_insert = if config.mode == MatchMode::Lazy
720                        && match_len <= config.max_lazy
721                        && pos + 1 < data.len()
722                    {
723                        1 // 如果进行了延迟检查，pos 已被插入，从 1 开始
724                    } else {
725                        0
726                    };
727
728                    // 批量插入字典，使用 usize 强制类型，移除闭包产生的隐式开销
729                    if config.mode != MatchMode::Rle {
730                        for i in start_insert..match_len {
731                            let p = pos + i;
732                            if p < insert_limit {
733                                let key = ((data[p] as usize) << 8) | (data[p + 1] as usize);
734                                prev[p] = head[key];
735                                head[key] = p as i32;
736                            }
737                        }
738                    }
739                    pos += match_len;
740                } else {
741                    ops.push(LzssOp::Literal(data[pos]));
742                    if config.mode != MatchMode::Rle && pos < insert_limit {
743                        let key = ((data[pos] as usize) << 8) | (data[pos + 1] as usize);
744                        prev[pos] = head[key];
745                        head[key] = pos as i32;
746                    }
747                    pos += 1;
748                }
749            }
750            ops
751        };
752
753        let symbols: Vec<u16> = ops
754            .iter()
755            .map(|op| match op {
756                LzssOp::Literal(byte) => *byte as u16,
757                LzssOp::Match { len, .. } => 256 + (len - 2),
758            })
759            .collect();
760        self.dec_count = symbols.len() as u32;
761
762        let mut freqs = vec![0u32; 512];
763        for &s in &symbols {
764            freqs[s as usize] += 1;
765        }
766
767        let depths = calculate_huffman_depths(&freqs);
768        let huffman_codes = generate_canonical_codes(&depths);
769
770        self.stream.writer.write_all(b"DSC FORMAT 1.00\0")?;
771        self.stream.writer.seek(std::io::SeekFrom::Start(0x10))?;
772        self.stream.writer.write_u32(self.key)?;
773        self.stream.writer.write_u32(data.len() as u32)?;
774        self.stream.writer.write_u32(self.dec_count)?;
775        self.stream.writer.seek(std::io::SeekFrom::Start(0x20))?;
776
777        for depth in depths.iter() {
778            let key = self.update_key();
779            self.stream.writer.write_u8(depth.overflowing_add(key).0)?;
780        }
781
782        for op in &ops {
783            match op {
784                LzssOp::Literal(byte) => {
785                    let symbol = *byte as u16;
786                    let (code, len) = huffman_codes[symbol as usize].unwrap();
787                    self.stream.put_bits(code as u32, len)?;
788                }
789                LzssOp::Match { len, offset } => {
790                    let symbol = 256 + (len - 2);
791                    let (code, huff_len) = huffman_codes[symbol as usize].unwrap();
792                    self.stream.put_bits(code as u32, huff_len)?;
793                    self.stream.put_bits((*offset - 2) as u32, 12)?;
794                }
795            }
796        }
797        self.stream.flush()?;
798        Ok(())
799    }
800
801    fn update_key(&mut self) -> u8 {
802        let v0 = 20021 * (self.key & 0xffff);
803        let mut v1 = self.magic | (self.key >> 16);
804        v1 = v1
805            .overflowing_mul(20021)
806            .0
807            .overflowing_add(self.key.overflowing_mul(346).0)
808            .0;
809        v1 = (v1 + (v0 >> 16)) & 0xffff;
810        self.key = (v1 << 16) + (v0 & 0xffff) + 1;
811        v1 as u8
812    }
813}
814
815#[derive(Debug)]
816/// Builder for DSC scripts.
817pub struct DscBuilder {}
818
819impl DscBuilder {
820    /// Creates a new instance of `DscBuilder`.
821    pub fn new() -> Self {
822        DscBuilder {}
823    }
824}
825
826impl ScriptBuilder for DscBuilder {
827    fn default_encoding(&self) -> Encoding {
828        Encoding::Cp932
829    }
830
831    fn default_archive_encoding(&self) -> Option<Encoding> {
832        Some(Encoding::Cp932)
833    }
834
835    fn build_script(
836        &self,
837        buf: Vec<u8>,
838        _filename: &str,
839        _encoding: Encoding,
840        _archive_encoding: Encoding,
841        config: &ExtraConfig,
842        _archive: Option<&Box<dyn Script>>,
843    ) -> Result<Box<dyn Script + Send + Sync>> {
844        Ok(Box::new(Dsc::new(buf, config)?))
845    }
846
847    fn extensions(&self) -> &'static [&'static str] {
848        &[]
849    }
850
851    fn script_type(&self) -> &'static ScriptType {
852        &ScriptType::BGIDsc
853    }
854
855    fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
856        if buf_len >= 16 && buf.starts_with(b"DSC FORMAT 1.00\0") {
857            return Some(255);
858        }
859        None
860    }
861
862    fn can_create_file(&self) -> bool {
863        true
864    }
865
866    fn create_file<'a>(
867        &'a self,
868        filename: &'a str,
869        mut writer: Box<dyn WriteSeek + 'a>,
870        _encoding: Encoding,
871        _file_encoding: Encoding,
872        config: &ExtraConfig,
873    ) -> Result<()> {
874        let mut writer = std::io::BufWriter::new(&mut writer);
875        let encoder = DscEncoder::new(&mut writer, config.bgi_compress_level);
876        let data = crate::utils::files::read_file(filename)?;
877        encoder.pack(&data)?;
878        Ok(())
879    }
880}
881
882#[derive(Debug)]
883/// DSC script
884pub struct Dsc {
885    data: Vec<u8>,
886    level: u8,
887}
888
889impl Dsc {
890    /// Creates a new Dsc script
891    ///
892    /// * `buf` - The buffer containing the DSC data.
893    /// * `config` - Extra configuration options.
894    pub fn new(buf: Vec<u8>, config: &ExtraConfig) -> Result<Self> {
895        if buf.len() < 16 || !buf.starts_with(b"DSC FORMAT 1.00\0") {
896            return Err(anyhow::anyhow!("Invalid DSC format"));
897        }
898        let decoder = DscDecoder::new(&buf)?;
899        let data = decoder.unpack()?;
900        Ok(Dsc {
901            data,
902            level: config.bgi_compress_level,
903        })
904    }
905}
906
907impl Script for Dsc {
908    fn default_output_script_type(&self) -> OutputScriptType {
909        OutputScriptType::Custom
910    }
911
912    fn is_output_supported(&self, output: OutputScriptType) -> bool {
913        matches!(output, OutputScriptType::Custom)
914    }
915
916    fn default_format_type(&self) -> FormatOptions {
917        FormatOptions::None
918    }
919
920    fn custom_output_extension(&self) -> &'static str {
921        ""
922    }
923
924    fn custom_export(&self, filename: &std::path::Path, _encoding: Encoding) -> Result<()> {
925        let mut f = std::fs::File::create(filename)?;
926        f.write_all(&self.data)?;
927        Ok(())
928    }
929
930    fn custom_import<'a>(
931        &'a self,
932        custom_filename: &'a str,
933        mut file: Box<dyn WriteSeek + 'a>,
934        _encoding: Encoding,
935        _output_encoding: Encoding,
936    ) -> Result<()> {
937        let mut file = std::io::BufWriter::new(&mut file);
938        let encoder = DscEncoder::new(&mut file, self.level);
939        let data = crate::utils::files::read_file(custom_filename)?;
940        encoder.pack(&data)?;
941        Ok(())
942    }
943}
944
945/// Parses the compression level for LZSS compression from a string.
946pub fn parse_compress_level(level: &str) -> Result<u8, String> {
947    number_range(level, 0, 10).map(|v| v as u8)
948}
msg_tool\scripts\bgi\archive/dsc.rs

msg_tool\scripts\bgi\archive/
dsc.rs